Normal Start Functions

Load isofiles

isofiles <-
  file.path(
    "Results",
    c(
      "180226_alkanes_C_GB",
      "180228_alkanes_C_GB",
      "180302_alkanes_C_GB"
    )
  ) %>%
  iso_read_continuous_flow()
## # A tibble: 20 x 4
##    file_id                  type  func           details                  
##    <chr>                    <chr> <chr>          <chr>                    
##  1 511__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  2 511__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  3 512__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  4 512__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  5 513__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  6 513__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  7 535__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  8 535__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  9 536__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 10 536__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 11 537__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 12 537__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 13 557__CO2 zero_6V_.dxf    error extract_dxf_r… cannot identify measured…
## 14 557__CO2 zero_6V_.dxf    error extract_isoda… `peak`, `start`, `rt`, `…
## 15 568__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 16 568__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 17 569__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 18 569__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 19 570__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 20 570__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…

Data

File Info

# all file info
isofiles %>% iso_get_file_info()
## Info: aggregating file info from 67 data file(s)

Vendor data table

isofiles %>% iso_get_vendor_data_table()
## Info: aggregating vendor data table without units from 67 data file(s)

Chromatograms

isofiles %>% 
  # fetch a few of the files of interst
  iso_filter_files(parse_number(Analysis) %in% c(557, 580)) %>% 
  # plot just mass 28
  iso_plot_continuous_flow_data(data = c(46)) %>% 
  # make interactive
  ggplotly(dynamicTicks = TRUE)
## Warning: You need the dev version of ggplot2 to use `dynamicTicks`

Analysis

Select relevant data

data_table <-
  isofiles %>%
  # filter the files you want to use 
  # --> exclude CO2 zeros
  iso_filter_files(!str_detect(`Identifier 1`, "CO2 zero")) %>% 
  # --> select only good analyses
  iso_filter_files(parse_number(Analysis) > 572) %>% 
  # get the vendor data table and file info
  iso_get_vendor_data_table(
    select = c(
      # peak info
      Nr., Start, Rt, End,
      # amplitudes and intensities
      `Ampl 44`, `Ampl 46`, `Intensity All`, `Intensity 44`,
      # ratios and deltas
      `R 46CO2/44CO2`, `d 13C/12C`
    ),
    include_file_info = c(
      file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, `Seed Oxidation`
    )
  ) %>% 
  # rename some columns to be easier to work with
  rename(
    Ampl44=`Ampl 44`, Ampl46=`Ampl 46`, # amplitudes
    #Area28=`rIntensity 28`, Area29=`rIntensity 29`, # areas
    Intensity=`Intensity All`, #peak intensities
    R46C = `R 46CO2/44CO2`, d13C = `d 13C/12C`, # ratio and delta values
    rt = `Rt`,
    rt_start = `Start`,
    rt_end = `End`
    #File = `file_id`
  )
## Info: applying file filter, keeping 54 of 67 files
## Info: applying file filter, keeping 8 of 54 files
## Info: aggregating vendor data table without units from 8 data file(s), including file info 'c(file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, 
##     `Seed Oxidation`)'
data_table

Map peaks

### CHANGE MAPPING FILE NAME ###
metadata_samples <- read_excel(file.path("metadata", "20180226_GB_metadata.xlsx"), sheet = "files")

metadata_peak_maps <- read_excel(file.path("metadata","20180226_GB_metadata.xlsx"), sheet = "maps")

metadata_samples
data_table_with_peaks <- data_table %>%
    iso_add_metadata(metadata_samples, match_by = c(`Identifier 1`, Analysis)) %>%
    iso_map_peaks(metadata_peak_maps) 
## Info: metadata added to 208 data rows, 0 left without metadata:
## - 1 metadata entries were mapped to 208 data entires via column 'Identifier 1'
## Info: 172 peaks in 8 files were successfully assigned, 36 could not be assigned, and 36 were missing
# missing and unidentified peaks
data_table_with_peaks %>% filter(!is_identified | is_missing)
# confirmed peaks
data_table_with_peaks <- data_table_with_peaks %>%
    filter(is_identified, !is_missing, !is_ambiguous)

data_table_with_peaks

Check stability of reference peaks

The second one is the one defined to be 0 permil (so will always be), the rest is relative to that peak.

p <- data_table_with_peaks %>% 
  filter(is_ref_peak == "yes") %>% 
  ggplot() + 
  aes(Nr., d13C, color = file_id) +
  geom_line() + 
  theme_bw()
ggplotly(p)

Add standard values

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
data_w_stds <- data_table_with_peaks %>% 
  filter(type == "standard", is_ref_peak == "no") %>% 
  left_join(standards, by = "compound") %>% 
  mutate(is_std = !is.na(true.d13C) | !is.na(true.d2H))

Process data

Focus on the analytes and calculate a few summary parameters we want to use later.

data_w_analyte_peaks <- 
  data_table_with_peaks %>% 
  # this is important so that the reference peaks are not caught up in the next set of calculations
  filter(is_ref_peak == "no") %>% 
  # for each analysis calculate averages across analysis
  group_by(Analysis) %>% 
  mutate(
    ampl_sample_mean.mV = mean(`Ampl44`), ampl_sample_sd.mV = sd(`Ampl44`),
    area_sample_mean.Vs = mean(`Intensity 44`), area_sample_sd.Vs = sd(`Intensity 44`)
  )

Standards

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
kable(standards)
compound true.d2H true.d13C
C16 -9.1 -26.15
C17 -117.8 -31.88
C18 -52.0 -32.70
C19 -56.3 -31.99
C20 -89.7 -33.97
C21 -177.8 -28.83
C22 -81.3 -33.77
C23 -67.2 -33.37
C24 -29.7 -32.13
C25 -263.0 -28.46
C26 -45.9 -32.94
C27 -172.8 -30.49
C28 -36.8 -33.20
C29 -177.8 -29.10
C30 -213.6 -29.84
data_w_stds <-
  data_w_analyte_peaks %>% 
  iso_add_standards(standards)  
## Info: added 12 standard entries to 84 out of 84 rows

Visualize standards

v <- data_w_stds %>% 
  ggplot() +
  aes(x = true.d13C, y = d13C, color = file_id) + 
  geom_smooth(method = "lm", se = FALSE, alpha = 0.5) +
  geom_point() +
  theme_bw() +
  theme(legend.position = "none") 
ggplotly(v)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Calibration

data_w_calibs <- data_w_stds %>% 
  # prepare for calibration by defining the grouping column(s) and setting default parameters
  iso_prepare_for_calibration(group_by = c(Analysis)) %>% 
  iso_set_default_process_parameters(delta_residual = resid_d13C) %>% 
  # run calibration
  iso_calibrate_delta(model = lm(`d13C` ~ true.d13C)) %>% 
  # pull out some columns we want generally available
  iso_unnest_calib_data(select = c(starts_with("Id"), ampl_sample_mean.mV)) 
## Info: preparing data for calibration by grouping based on 'Analysis' and nesting the grouped datasets into 'calibration_data'
## Info: calculating delta calibration fits based on 1 model ('lm(d13C ~ true.d13C)') for 8 data group(s) in 'calibration_data' with filter 'is_standard'; storing residuals in 'resid_d13C.'
data_w_calibs %>% 
  iso_unnest_delta_calib_summary(keep_other_list_data = FALSE) %>% 
  kable(digits =3)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
573 A6 0.5uL 312.947 0.940 0.933 0.522 141.261 0 2 -7.354 20.708 21.902 2.452 9
574 A6 0.5uL 271.295 0.924 0.915 0.623 109.081 0 2 -9.300 24.601 25.794 3.494 9
575 A6 0.5uL 315.313 0.950 0.944 0.509 171.132 0 2 -7.085 20.170 21.363 2.335 9
576 A6 1.0uL 417.603 0.940 0.934 0.529 157.158 0 2 -8.300 22.600 24.055 2.802 10
577 A6 1.0uL 480.845 0.989 0.986 0.224 364.108 0 2 1.685 2.629 2.005 0.200 4
578 A6 1.0uL 432.851 0.946 0.940 0.508 158.919 0 2 -7.046 20.093 21.286 2.319 9
579 A6 1.0uL 452.555 0.937 0.930 0.562 133.978 0 2 -8.162 22.323 23.517 2.840 9
580 A6 1.0uL 426.282 0.950 0.945 0.479 172.259 0 2 -6.418 18.836 20.030 2.069 9
data_w_calibs %>% 
  # pull out seed oxidation in addition
  iso_unnest_calib_data(select = `Seed Oxidation`) %>% 
  iso_unnest_delta_calib_coefs(select = c(-statistic), keep_other_list_data = FALSE) %>% 
  # arrange by term and Analysis to get a quick idea of what the numbers across analyses
  arrange(term, Analysis) %>% 
  kable(digits = 2)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV Seed Oxidation term estimate std.error p.value signif
573 A6 0.5uL 312.95 1 (Intercept) 13.02 2.91 0 **
574 A6 0.5uL 271.29 1 (Intercept) 14.72 3.47 0 **
575 A6 0.5uL 315.31 1 (Intercept) 15.47 2.84 0 ***
576 A6 1.0uL 417.60 1 (Intercept) 12.10 2.67 0 **
577 A6 1.0uL 480.85 1 (Intercept) 9.64 1.55 0 **
578 A6 1.0uL 432.85 1 (Intercept) 14.23 2.83 0 ***
579 A6 1.0uL 452.56 1 (Intercept) 14.82 3.13 0 **
580 A6 1.0uL 426.28 1 (Intercept) 13.56 2.67 0 ***
573 A6 0.5uL 312.95 1 true.d13C 1.10 0.09 0 ***
574 A6 0.5uL 271.29 1 true.d13C 1.15 0.11 0 ***
575 A6 0.5uL 315.31 1 true.d13C 1.18 0.09 0 ***
576 A6 1.0uL 417.60 1 true.d13C 1.07 0.09 0 ***
577 A6 1.0uL 480.85 1 true.d13C 0.96 0.05 0 ***
578 A6 1.0uL 432.85 1 true.d13C 1.13 0.09 0 ***
579 A6 1.0uL 452.56 1 true.d13C 1.15 0.10 0 ***
580 A6 1.0uL 426.28 1 true.d13C 1.11 0.08 0 ***

Parameters

data_params <- data_w_calibs %>% 
  # pull out remaining columns we want available (some might already be pulled out but that's okay)
  #Note: Garrett changed "Preparation" to "Seed Oxidation" - we may want a different shape variable
  iso_unnest_calib_data(select = c(file_datetime, ampl_sample_mean.mV, `Seed Oxidation`, is_standard)) 
  
# visualize the delta calibration fits
data_params %>% 
  #NEED TO FILTER OUT SAMPLES iso_filter_files(`Identifier 1` == "A5") 
  iso_visualize_delta_calib_fits(x = Analysis, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. analysis")

data_params %>% 
  iso_visualize_delta_calib_fits(x = ampl_sample_mean.mV, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. amplitude") 

data_params %>% 
  iso_visualize_delta_calib_fits(x = file_datetime, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. time")

# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Compounds

data_w_calibs %>% 
  # pull out relevant data
  iso_unnest_calib_data(select = everything()) %>% 
  filter(is_standard) %>% 
  # calculate deviation from means
  group_by(Analysis) %>% 
  mutate(
    `Var: residual d13C [permil]` = resid_d13C,
    `Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
    `Var: amplitude diff from mean [%]` = (`Ampl44`/mean(`Ampl44`) - 1) * 100
  ) %>%
  # visualize
  iso_visualize_data(x = compound, y = starts_with("Var"), group = Analysis, color = `Identifier 2`)

ggplotly(ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`